@conference {stewart_new_2007,
	title = {A New Generation of Textual Corpora: Mining Corpora from Very Large Collections},
	booktitle = {Proceedings of the 7th ACM/IEEE-CS Joint Conference on Digital Libraries},
	series = {{JCDL} {\textquoteright}07},
	year = {2007},
	note = {00017},
	pages = {356{\textendash}365},
	publisher = {ACM},
	organization = {ACM},
	address = {New York, NY, USA},
	abstract = {This article considers OCR programs developed for reading classical Greek. The authors of this paper work to show how computational correction practices can create text documents with accuracy ratings comparable to "hand-crafted corpora." Three challenges of Greek OCR documents are identified: exclusion of variant readings, exclusion of multiple editions, and inability to draw connections between texts that reference each other. The authors point to work done, individually, in all of these areas by citing different digital archives and versioning softwares. However, these three challenges are not addressed by a single project. In order to grapple with this, the authors structure a multi-tiered approach to OCRing Greek texts. The authors discovered that simple error correction techniques based on word lists and morphological analyzer improve results, increase accuracy by including multiple editions. In closing, a greater is made towards trajectories of future work, including image quality, comparison errors, and recognizing accents. },
	keywords = {ancient greek, text alignment, {OCR} evaluation},
	isbn = {978-1-59593-644-8},
	doi = {10.1145/1255175.1255247},
	url = {http://doi.acm.org/10.1145/1255175.1255247},
	author = {Stewart, Gordon and Crane, Gregory and Babeu, Alison}
}
@conference {crane_new_2000,
	title = {New Technology and New Roles: The Need for "Corpus Editors"},
	booktitle = {Proceedings of the Fifth {ACM} Conference on Digital Libraries},
	series = {{DL} {\textquoteright}00},
	year = {2000},
	note = {00000},
	pages = {252{\textendash}253},
	publisher = {ACM},
	organization = {ACM},
	address = {New York, NY, USA},
	abstract = {This article discusses the new set of skills necessary for an editor in the digital age, specifically focusing on the corpus editor who is unable to manually work through each document in the collection due to its size. Crane and Rydberg-Cox argue for the importance of creating proper graduate education for these editors to develop the necessary skills to work with machines that perform tasks such as automatic tagging of elements of the text. The technology for employing such automated tasks exists and the corpus editor should have computational competence and area knowledge to maneuver it into handling tasks of varying difficulty and setting the necessary prerequisites for different fields. Working with these digital techniques and having the corpus editor surveying the results when necessary and editing more complicated parts instead would go a far way into improving the practice. The Perseus library started taking steps in this direction by supporting postgraduate scholars, but they call for a more comprehensive approach to fill this gap. },
	keywords = {corpus linguistics, editing, Hypertext},
	isbn = {1-58113-231-X},
	doi = {10.1145/336597.336686},
	url = {http://doi.acm.org/10.1145/336597.336686},
	author = {Crane, Gregory and Rydberg-Cox, Jeffrey A.}
}
